
# ======================================================================
# Project:    Closed borders, closed minds? COVID-related border closures,
#             EU support and hostility towards immigrants
#
# Script:     Table 1 + Appendix Table B1
#
# Authors:    Lisa Herbig (l.j.herbig@uva.nl)
#             Asli Unan (a.unan@uva.nl)
#
# Date:       2025-03-24
# ======================================================================

# Description:
# This script generates table 1 and B1, illustrating number of interviews per week, comparing border and core regions. 

# ----------------------------------------------------------------------
# Load libraries
# ----------------------------------------------------------------------

### Packages
library("estimatr")
library("dplyr")
library("foreign")
library("ggplot2")
library("haven")
library("lmtest")
library("lubridate")
library("modelsummary")
library("rdrobust")
library("readr")     
library("readxl")
library("tibble")
library("tidyquant")
library("tidyr")
library("tidyverse")

options(tibble.print_max = Inf)
options(tibble.width = Inf)

######################################################BRING IN INDIVIDUAL DATASET
  df_long_ind <- readRDS("~/work/November_2024/R_out/df_long_ind.rds")
  
  # Recode closest_country with country code 
    df_long_ind <- df_long_ind %>%
         mutate(new_country_code_closest = case_when(
             closest_country == "Austria" ~ 1,
             closest_country == "Belgium" ~ 2,
             closest_country == "Switzerland" ~ 3,
             closest_country == "Czechia" ~ 4,
             closest_country == "Denmark" ~ 6,
             closest_country == "France" ~ 7,
             closest_country == "Luxembourg" ~ 8,
             closest_country == "Netherlands" ~ 9,
             closest_country == "Poland" ~ 10
           ))
    
    table(df_long_ind$new_country_code_closest)
    
    #create the same variable but individuals in core regions are coded as 5 
      df_long_ind <- df_long_ind %>%
         mutate(new_country_code_closest25 = if_else(dist_kreis <= 25, new_country_code_closest, 5))
    
    table(df_long_ind$new_country_code_closest25)
    table(df_long_ind$border_region_25)
   
    table(df_long_ind$border_region_50)
    
    table(df_long_ind$border_region_ind_25)
    table(df_long_ind$border_region_ind_50)
    
####################################################### Now bring in the NEW borders dataset
    
NEW_border_closures <- read_csv("~/work/2023-02-24/R_in/final_border_closure_data.csv")
    
names(NEW_border_closures)
#NEW_border_closures$newdate <- format(as.Date(NEW_border_closures$date, "%m/%d/%Y"),"%Y-%m-%d")
NEW_border_closures$newdate <- format(as.Date(NEW_border_closures$date, "%m/%d/%Y"))
NEW_border_closures$newdate <- as.Date(NEW_border_closures$newdate)
NEW_border_closures$newweekyear <- format(as.Date(NEW_border_closures$newdate), "%Y-%W")
                                                                                                                                                             
#Replace newweekyear 2020-00 with 2019-52 
NEW_border_closures$newweekyear[NEW_border_closures$newweekyear=="2020-00"]="2019-52"

#####Assign 0 for the core regions
NEW_border_closures$internal_closure[NEW_border_closures$country_code == 5]=0

NEW_border_closures$external_closure[NEW_border_closures$country_code == 5]=0

NEW_border_closures$closure_both[NEW_border_closures$country_code == 5]=0

NEW_border_closures$quarantine_continuous[NEW_border_closures$country_code == 5]=0

NEW_border_closures$risk_areas[NEW_border_closures$country_code == 5]=0

NEW_border_closures$quarantine_until_june[NEW_border_closures$country_code == 5]=0

######################################################CREATE  DATASET
#########MERGE BASED ON NEW_COUNTRY_CODE_CLOSEST - CORE REGIONS CODED AS UNTREATE (=0)
##merge df_long_ind based on date (date) and (new_country_code_closest)
FINAL_df_long_ind_CORE25 <- list(df_long_ind, NEW_border_closures) %>%
+   reduce(left_join, by=c("new_country_code_closest25" = "country_code", "date"="newdate"))
                                                                                                                                                             
### Make ANY treatment var
FINAL_df_long_ind_CORE25$any_closure=NA
                                                                                                                                                             
FINAL_df_long_ind_CORE25$any_closure[FINAL_df_long_ind_CORE25$internal_closure==1 | FINAL_df_long_ind_CORE25$external_closure==1]=1
                                                                                                                                                             
FINAL_df_long_ind_CORE25$any_closure[FINAL_df_long_ind_CORE25$internal_closure==0 & FINAL_df_long_ind_CORE25$external_closure==0]=0
                                                                                                                                                             
FINAL_df_long_ind_CORE25$any_closure = as.numeric(FINAL_df_long_ind_CORE25$any_closure)

#EAST dummy
FINAL_df_long_ind_CORE25$East=0
                                                                                                                                                             
FINAL_df_long_ind_CORE25$East[FINAL_df_long_ind_CORE25$new_country_code_closest==1]=1
                                                                                                                                                             
FINAL_df_long_ind_CORE25$East[FINAL_df_long_ind_CORE25$new_country_code_closest==4]=1

FINAL_df_long_ind_CORE25$East[FINAL_df_long_ind_CORE25$new_country_code_closest==10]=1

FINAL_df_long_ind_CORE25$East = as.numeric(as.factor(FINAL_df_long_ind_CORE25$East))

#Lockdown dummy
FINAL_df_long_ind_CORE25$lockdown=0
                                                                                                                                                             
FINAL_df_long_ind_CORE25$lockdown[FINAL_df_long_ind_CORE25$code_m18_030==1 | FINAL_df_long_ind_CORE25$code_m18_030==2| FINAL_df_long_ind_CORE25$code_m18_030==3]=1

FINAL_df_long_ind_CORE25$lockdown = as.numeric(as.factor(FINAL_df_long_ind_CORE25$lockdown))
                                                                                                                                                             
###Add infection rates neighboring country
border_region_infection_rate <- read_csv("~/work/November_2024/R_in/border_region_infection_rate.csv")

border_region_infection_rate <- border_region_infection_rate %>%
   mutate(iso = case_when(
     country == "Austria" ~ 1,
     country == "Belgium" ~ 2,
     country == "Switzerland" ~ 3,
     country == "Czechia" ~ 4,
     country == "Denmark" ~ 6,
     country == "France" ~ 7,
     country == "Luxembourg" ~ 8,
     country == "Netherlands" ~ 9,
     country == "Poland" ~ 10
   ))

FINAL_df_long_ind_CORE25 <- list(FINAL_df_long_ind_CORE25, border_region_infection_rate) %>%
   reduce(left_join, by=c("newweekyear" = "year_week", "new_country_code_closest"="iso"))


FINAL_df_long_ind_CORE25$dist_ind = as.numeric(FINAL_df_long_ind_CORE25$dist_ind)

FINAL_df_long_ind_CORE25$Voting_2018_GALTAN = as.numeric(FINAL_df_long_ind_CORE25$Voting_2018_GALTAN)

FINAL_df_long_ind_CORE25$german_citizenship = as.factor(FINAL_df_long_ind_CORE25$german_citizenship)

FINAL_df_long_ind_CORE25$occupational_status = as.factor(FINAL_df_long_ind_CORE25$occupational_status)

FINAL_df_long_ind_CORE25$birthyear = as.numeric(FINAL_df_long_ind_CORE25$birthyear)

FINAL_df_long_ind_CORE25$sex = as.factor(FINAL_df_long_ind_CORE25$sex)

FINAL_df_long_ind_CORE25$interview_completion = as.factor(FINAL_df_long_ind_CORE25$interview_completion)

FINAL_df_long_ind_CORE25$work_abroad = as.factor(FINAL_df_long_ind_CORE25$work_abroad)

FINAL_df_long_ind_CORE25$CONTACTABROAD = as.factor(FINAL_df_long_ind_CORE25$CONTACTABROAD)

FINAL_df_long_ind_CORE25$var_rate_14_day_per_100k = as.numeric(FINAL_df_long_ind_CORE25$var_rate_14_day_per_100k)

summary(FINAL_df_long_ind_CORE25$iso2.y)

############################################################################
###################DESCRIPTIVE ANALYSIS INDIVIDUAL LEVEL####################
############################################################################

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==1]="Austria"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==2]="Belgium"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==3]="Switzerland"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==4]="Czechia"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==5]="Germany"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==6]="Denmark"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==7]="France"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==8]="Luxemburg"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==9]="Netherlands"

FINAL_df_long_ind_CORE25$iso2_name[FINAL_df_long_ind_CORE25$iso2.y==10]="Poland"

FINAL_df_long_ind_CORE25$Border_Core[FINAL_df_long_ind_CORE25$iso2.y!=5]="Border region"

FINAL_df_long_ind_CORE25$Border_Core[FINAL_df_long_ind_CORE25$iso2.y==5]="Core region"

df_mini = FINAL_df_long_ind_CORE25 %>%
   drop_na(iso2_name)

#subset datatset to year 2020
df_mini_2020 = df_mini %>%
  filter(syear 2019)

#interviews per week - Table 1 and Appendix Table B1
table(df_mini_2020$weekyear)

interviews_per_week_BC <- df_mini_2020 %>%
   group_by(weekyear, Border_Core)%>%
   summarise(interviews = n())

print(interviews_per_week_BC)

df_mini_2020 <- df_mini_2020 %>%
   mutate(week_period = ifelse(weekyear < "11-2020", "Before Closure", "After Closure"))

#FOR NORMALIZED VALUES
normalize = function(x) {
   data_range = max(x, na.rm = TRUE) - min(x, na.rm = TRUE)
   return((x - min(x, na.rm = TRUE)) / data_range)
 }

df_mini_2020$con_europe_n = normalize(df_mini_2020$con_europe)

df_mini_2020$AfD_2018_n = normalize(df_mini_2020$AfD_2018)

df_mini_2020$immigration_pca_n = normalize(df_mini_2020$immigration_pca)*(-1)

df_mini_2020$refugee_eco_n = normalize(df_mini_2020$refugee_eco)

df_mini_2020$refugee_culture_n = normalize(df_mini_2020$refugee_culture)

df_mini_2020$refugee_ger_living_n = normalize(df_mini_2020$refugee_ger_living)

df_mini_2020$Voting_2018_GALTAN_n = normalize(df_mini_2020$Voting_2018_GALTAN)

df_mini_2020$Voting_2018_LeftRight_n = normalize(df_mini_2020$Voting_2018_LeftRight)

mean_comparison <- df_mini_2020 %>%
   group_by(Border_Core, week_period) %>%
   summarise(
     mean_con_europe = mean(con_europe, na.rm = TRUE),
     sd_con_europe = sd(con_europe, na.rm = TRUE),
     mean_refugee_eco = mean(refugee_eco, na.rm = TRUE),
     sd_refugee_eco = sd(refugee_eco, na.rm = TRUE),
     mean_refugee_culture = mean(refugee_culture, na.rm = TRUE),
     sd_refugee_culture = sd(refugee_culture, na.rm = TRUE),
     mean_refugee_ger_living = mean(refugee_ger_living, na.rm = TRUE),
     sd_refugee_ger_living = sd(refugee_ger_living, na.rm = TRUE),
     mean_left_right = mean(Voting_2018_LeftRight, na.rm = TRUE),
     sd_left_right = sd(Voting_2018_LeftRight, na.rm = TRUE),
     mean_immigration_pca = mean(immigration_pca, na.rm = TRUE)*(-1),
     sd_immigration_pca = sd(immigration_pca, na.rm = TRUE),
     mean_afd = mean(AfD_2018, na.rm = TRUE),
     sd_afd = sd(AfD_2018, na.rm = TRUE),
     mean_GALTAN = mean(Voting_2018_GALTAN, na.rm = TRUE),
     sd_GALTAN = sd(Voting_2018_GALTAN, na.rm = TRUE),
     mean_dist_ind = mean(dist_ind, na.rm = TRUE),
     sd_dist_ind = sd(dist_ind, na.rm = TRUE),
     mean_any_closure = mean(any_closure, na.rm = TRUE),
     sd_any_closure = sd(any_closure, na.rm = TRUE),
     n = n()
   )

print(mean_comparison)

mean_comparison_n <- df_mini_2020 %>%
   group_by(Border_Core, week_period) %>%
   summarise(
     mean_con_europe_n = mean(con_europe_n, na.rm = TRUE),
     sd_con_europe_n = sd(con_europe_n, na.rm = TRUE),
     mean_refugee_eco_n = mean(refugee_eco_n, na.rm = TRUE),
     sd_refugee_eco_n = sd(refugee_eco_n, na.rm = TRUE),
     mean_refugee_culture_n = mean(refugee_culture_n, na.rm = TRUE),
     sd_refugee_culture_n = sd(refugee_culture_n, na.rm = TRUE),
     mean_refugee_ger_living_n = mean(refugee_ger_living_n, na.rm = TRUE),
     sd_refugee_ger_living_n = sd(refugee_ger_living_n, na.rm = TRUE),
     mean_left_right_n = mean(Voting_2018_LeftRight_n, na.rm = TRUE),
     sd_left_right_n = sd(Voting_2018_LeftRight_n, na.rm = TRUE),
     mean_immigration_pca_n = mean(immigration_pca_n, na.rm = TRUE)*(-1),
     sd_immigration_pca_n = sd(immigration_pca_n, na.rm = TRUE),
     mean_afd_n = mean(AfD_2018_n, na.rm = TRUE),
     sd_afd_n = sd(AfD_2018_n, na.rm = TRUE),
     mean_GALTAN_n = mean(Voting_2018_GALTAN_n, na.rm = TRUE),
     sd_GALTAN_n = sd(Voting_2018_GALTAN_n, na.rm = TRUE),
     mean_any_closure = mean(any_closure, na.rm = TRUE),
     sd_any_closure = sd(any_closure, na.rm = TRUE),
     n = n()
   )

print(mean_comparison_n)

#Total number of individuals living in border region
table(df_mini_2020$Border_Core)


#Total number of treated individuals
table(df_mini_2020$any_closure)


#Total number of treated individuals living in border region
df_mini_2020_BORDER = df_mini_2020 %>%
   filter(any_closure == 1 )

table(df_mini_2020_BORDER$Border_Core)

#Mean values border regions vs. core region
mean_sd_values <- df_mini_2020 %>%
   group_by(Border_Core) %>%
   summarise(
     mean_con_europe = mean(con_europe, na.rm = TRUE),
     sd_con_europe = sd(con_europe, na.rm = TRUE),
     mean_refugee_eco = mean(refugee_eco, na.rm = TRUE),
     sd_refugee_eco = sd(refugee_eco, na.rm = TRUE),
     mean_refugee_culture = mean(refugee_culture, na.rm = TRUE),
     sd_refugee_culture = sd(refugee_culture, na.rm = TRUE),
     mean_refugee_ger_living = mean(refugee_ger_living, na.rm = TRUE),
     sd_refugee_ger_living = sd(refugee_ger_living, na.rm = TRUE),
     mean_left_right = mean(Voting_2018_LeftRight, na.rm = TRUE),
     sd_left_right = sd(Voting_2018_LeftRight, na.rm = TRUE),
     mean_immigration_pca = mean(immigration_pca, na.rm = TRUE)*(-1),
     sd_immigration_pca = sd(immigration_pca, na.rm = TRUE),
     mean_afd = mean(AfD_2018, na.rm = TRUE),
     sd_afd = sd(AfD_2018, na.rm = TRUE),
     mean_GALTAN = mean(Voting_2018_GALTAN, na.rm = TRUE),
     sd_GALTAN = sd(Voting_2018_GALTAN, na.rm = TRUE),
     mean_dist_ind = mean(dist_ind, na.rm = TRUE),
     sd_dist_ind = sd(dist_ind, na.rm = TRUE),
     mean_any_closure = mean(any_closure, na.rm = TRUE),
     sd_any_closure = sd(any_closure, na.rm = TRUE),
   )

print(mean_sd_values, n = Inf)

#mean values by region
mean_sd_values_region <- df_mini_2020 %>%
   group_by(iso2_name) %>%
   summarise(
     mean_con_europe = mean(con_europe, na.rm = TRUE),
     sd_con_europe = sd(con_europe, na.rm = TRUE),
     mean_refugee_eco = mean(refugee_eco, na.rm = TRUE),
     sd_refugee_eco = sd(refugee_eco, na.rm = TRUE),
     mean_refugee_culture = mean(refugee_culture, na.rm = TRUE),
     sd_refugee_culture = sd(refugee_culture, na.rm = TRUE),
     mean_refugee_ger_living = mean(refugee_ger_living, na.rm = TRUE),
     sd_refugee_ger_living = sd(refugee_ger_living, na.rm = TRUE),
     mean_left_right = mean(Voting_2018_LeftRight, na.rm = TRUE),
     sd_left_right = sd(Voting_2018_LeftRight, na.rm = TRUE),
     mean_immigration_pca = mean(immigration_pca, na.rm = TRUE)*(-1),
     sd_immigration_pca = sd(immigration_pca, na.rm = TRUE),
     mean_afd = mean(AfD_2018, na.rm = TRUE),
     sd_afd = sd(AfD_2018, na.rm = TRUE),
     mean_GALTAN = mean(Voting_2018_GALTAN, na.rm = TRUE),
     sd_GALTAN = sd(Voting_2018_GALTAN, na.rm = TRUE),
     mean_dist_ind = mean(dist_ind, na.rm = TRUE),
     sd_dist_ind = sd(dist_ind, na.rm = TRUE),
     mean_any_closure = mean(any_closure, na.rm = TRUE),
     sd_any_closure = sd(any_closure, na.rm = TRUE),
   )

print(mean_sd_values_region)

#mean values by region
mean_sd_values_n <- df_mini_2020 %>%
   group_by(Border_Core) %>%
   summarise(
     mean_con_europe_n = mean(con_europe_n, na.rm = TRUE),
     sd_con_europe_n = sd(con_europe_n, na.rm = TRUE),
     mean_refugee_eco_n = mean(refugee_eco_n, na.rm = TRUE),
     sd_refugee_eco_n = sd(refugee_eco_n, na.rm = TRUE),
     mean_refugee_culture_n = mean(refugee_culture_n, na.rm = TRUE),
     sd_refugee_culture_n = sd(refugee_culture_n, na.rm = TRUE),
     mean_refugee_ger_living_n = mean(refugee_ger_living_n, na.rm = TRUE),
     sd_refugee_ger_living_n = sd(refugee_ger_living_n, na.rm = TRUE),
     mean_left_right_n = mean(Voting_2018_LeftRight_n, na.rm = TRUE),
     sd_left_right_n = sd(Voting_2018_LeftRight_n, na.rm = TRUE),
     mean_immigration_pca_n = mean(immigration_pca_n, na.rm = TRUE)*(-1),
     sd_immigration_pca_n = sd(immigration_pca_n, na.rm = TRUE),
     mean_afd_n = mean(AfD_2018_n, na.rm = TRUE),
     sd_afd_n = sd(AfD_2018_n, na.rm = TRUE),
     mean_GALTAN_n = mean(Voting_2018_GALTAN_n, na.rm = TRUE),
     sd_GALTAN_n = sd(Voting_2018_GALTAN_n, na.rm = TRUE),
   )

print(mean_sd_values_n)

#mean values by region
mean_sd_values_region_n <- df_mini_2020 %>%
   group_by(iso2_name) %>%
   summarise(
     mean_con_europe_n = mean(con_europe_n, na.rm = TRUE),
     sd_con_europe_n = sd(con_europe_n, na.rm = TRUE),
     mean_refugee_eco_n = mean(refugee_eco_n, na.rm = TRUE),
     sd_refugee_eco_n = sd(refugee_eco_n, na.rm = TRUE),
     mean_refugee_culture_n = mean(refugee_culture_n, na.rm = TRUE),
     sd_refugee_culture_n = sd(refugee_culture_n, na.rm = TRUE),
     mean_refugee_ger_living_n = mean(refugee_ger_living_n, na.rm = TRUE),
     sd_refugee_ger_living_n = sd(refugee_ger_living_n, na.rm = TRUE),
     mean_left_right_n = mean(Voting_2018_LeftRight_n, na.rm = TRUE),
     sd_left_right_n = sd(Voting_2018_LeftRight_n, na.rm = TRUE),
     mean_immigration_pca_n = mean(immigration_pca_n, na.rm = TRUE)*(-1),
     sd_immigration_pca_n = sd(immigration_pca_n, na.rm = TRUE),
     mean_afd_n = mean(AfD_2018_n, na.rm = TRUE),
     sd_afd_n = sd(AfD_2018_n, na.rm = TRUE),
     mean_GALTAN_n = mean(Voting_2018_GALTAN_n, na.rm = TRUE),
     sd_GALTAN_n = sd(Voting_2018_GALTAN_n, na.rm = TRUE),
   )

print(mean_sd_values_region_n)

sink()
